docker run -it \
  -d \
  --ipc=host \
  -p 5536:8000 \
  --device /dev/kfd \
  --device /dev/dri \
  --group-add video \
  -v /data/models:/models \
  --env "HF_HUB_OFFLINE=1" \
  --env "API_KEY=Amdyes@wuhan027" \
  --security-opt seccomp=unconfined \
  rcatrangiu/vllm-openai-rocm:v0.8.2 \
  --model /models/DeepSeek-R1-Distill-Llama-70B-Q4_K_M/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf \
  --generation-config /models/DeepSeek-R1-Distill-Llama-70B-Q4_K_M \
  --tokenizer /models/DeepSeek-R1-Distill-Llama-70B-Q4_K_M \
  --served-model-name DeepSeek-R1-Distill-Llama-70B-Q4_K_M \
  --quantization gguf \
  --gpu-memory-utilization 0.95 \
  --host 0.0.0.0 \
  --port 8000 \
  --tensor-parallel-size 1
